#delimit;
cd D:\Dropbox\zworking\r-patent\paper-trends\replication;
cap n log close; log using patent-spatial-replication.log, replace; 

* William Kerr, TGG;
* Core RP2 graphics and tables;
* Combines patents working file and ML sw code in as inputs;
* Working file is transferred with data;
* 2 appendix figures produced in R;

clear all; set more off;

/*********************************;
*** Baseline Prep             ***;
*********************************;

* Prep AI data;
use doc_id predict50_any_ai flag_patent 
    if flag_patent==1 using ./data-ai/ai_model_predictions, clear;
ren doc_id patent_number; ren predict50_any_ai software_ai; drop flag_patent;
sort patent_number; save ./temp/patent-ai, replace;

* Load patent data;
use patent_number invdom msa1 utility ayear patent_kind domshare software univ ind gov individual 
    nber_cat1 uspc0 uspc1 gyear ass_id1         
    dENG dCHN dEUR dHIS dHIN dJAP dKOR dRUS dVNM
    num_claims backward forward fw_selfcite originality generality 
    if utility==1 & (domshare>0 & domshare!=.) & (ayear>=1975 & ayear<=2019) 
       & (nber_cat1>=1 & nber_cat<=6) & software!=. & dENG!=.
    using ./data-YE20/patents_working, clear;
drop domshare utility patent_kind;
sum;

* Merge software and summarize designation;
ren software software_bh;
sort patent_number; merge patent_number using ./data-YE20/extra/alt_software;
tab _m; drop if _m==2; drop _m; ren software software_gv;
sort patent_number; merge patent_number using ./data-YE20/extra/ml_software; 
tab _m; drop if _m==2; drop _m;
sort patent_number; merge patent_number using ./temp/patent-ai; 
tab _m; drop if _m==2; drop _m; erase ./temp/patent-ai.dta;
sum software_bh software_gv software_ml software_ai; 
sum software_bh software_gv software_ml software_ai if software_ai==1; 
pwcorr software_bh software_gv software_ml software_ai; 

* Generate city groups and periods;
codebook msa1; ren msa1 msa; replace msa=9999 if msa==.;
gen cluster=3;
* SF, BOS, SEA, SD, DEN, AUS;
for any 7362 1122 7602 7320 2082 640: replace cluster=1 if msa==X;
* NY, LA, CHI, PHL, DET ... (DC, DAL, HOU);
for any 5602 4472 1602 6162 2162: replace cluster=2 if msa==X;
replace cluster=4 if msa==. | msa==9999;
codebook msa if cluster==3;
gen period=1975;
for any 1980 1985 1990 1995 2000 2005 2010 2015: replace period=X if ayear>=X; 
compress;
save patent-spatial-final1, replace;
*/

*********************************;
*** Core Bessen-Hunt Results  ***;
*********************************;

use patent-spatial-final1, clear;
drop gyear ayear num_claims backward forward fw_selfcite originality generality dENG-dVNM uspc*;

* Organize patents and institutions;
gen Tcttot=1; gen Tctsw=software_bh; gen Tctnsw=1-software_bh;
for any cttot ctsw ctnsw: gen IX=TX if ind==1 \ gen UX=TX if univ==1 \ gen GX=TX if gov==1 \ gen NX=TX if (ind==0 & gov==0 & univ==0);
for any cttot ctsw ctnsw: gen C1X=TX if (nber_cat==2 | nber_cat==4) \ gen C2X=TX if (nber_cat==1 | nber_cat==3) \ gen C3X=TX if (nber_cat==5 | nber_cat==6);
for num 1/6: gen ZXctsw=Tctsw if nber_cat==X \ gen ZXctnsw=Tctnsw if nber_cat==X;

* Collapse on MSA-period cells and merge population data;
* Prepare a zero-valued observation for 7000 in 1980;
* Fix pop min for early years of 380 (Anchorage AK) and 3320 (Honolulu HI);
collapse (sum) T* I* U* G* N* C* Z* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T* I* U* G* N* C* Z*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate HHI and EG components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* I* U* G* N* C* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
for var T* I* U* G* N* C* pop: gen hhiX=shX^2 if msa!=9999;
for var T* I* U* G* N* C*: gen egX=(shX-shpop)^2 if msa!=9999;
for var hhi* eg*: replace X=. if msa==9999;
collapse (sum) T* I* U* G* N* C* Z* tab* hhi* eg*, by(period) fast;
for var tabTcttot*: gen rawX=X \ replace X=X/Tcttot;
for var tabTctsw*: gen rawX=X \ replace X=X/Tctsw;
for var tabTctnsw*: gen rawX=X \ replace X=X/Tctnsw;
for var eg*: replace X=X/(1-hhipop);
for any T I U G N C1 C2 C3: gen shswX=100*Xctsw/Xcttot;
format tab* eg* hhi* %4.3f; format sh* %3.2f; format raw* %8.0f; 

* Table S1: taken from JEP article;

* Figure 1;
for any tot sw nsw:
list period rawtabTctX*, clean noobs;

* Figure 2 & Table S2;
for any tot sw nsw:
list period tabTctX*, clean noobs;

* Figure 3;
for any T I U:
list period egXcttot egXctsw egXctnsw, clean noobs;

* Figure 4;
list period Z*, clean noobs;

* Table S3;
for any T C1 C2 C3:
list period Xcttot Xctsw shswX hhiXcttot hhiXctsw hhiXctnsw egXcttot egXctsw egXctnsw, clean noobs;

* Table S4;
for any I U G N:
list period Xcttot Xctsw shswX hhiXcttot hhiXctsw hhiXctnsw egXcttot egXctsw egXctnsw, clean noobs;

*********************************;
*** non-MSA Stats Bessen-Hunt ***;
*********************************;

* Statistics re San Francisco;
use patent-spatial-final1, clear;
tab period, s(software_bh);
gen SF=(msa==7362);
tab period, s(SF);
tab period if software_bh==1, s(SF);
tab period if software_bh!=1, s(SF);

* Statistics re Rust Belt;
*  Buffalo NY, Cincinnati OH, Cleveland OH, Columbus OH, Indianapolis IN, Milwaukee WI, Pittsburgh PA, and St. Louis MO;
use patent-spatial-final1, clear;
gen rust=0; for num 1282 1642 1692 1840 3480 5082 6282 7040: replace rust=1 if msa==X;
tab period, s(rust);

* Table S5a/b;
use patent-spatial-final1, clear;
replace fw_selfcite=0 if fw_selfcite==.; gen forwardext=forward*(1-fw_selfcite); drop fw_selfcite; 
gen techcl=(cluster==1); gen ct=1;
collapse (sum) ct (mean) num_claims backward forward forwardext originality generality, by(techcl software_bh period) fast;
format num_claims backward forward forwardext %3.1f; format originality generality %4.3f;
sort techcl software_bh period;
list period ct num_claims backward forward forwardext originality generality if techcl==1 & software_bh==1, clean noobs;
list period ct num_claims backward forward forwardext originality generality if techcl==0 & software_bh==1, clean noobs;
list period ct num_claims backward forward forwardext originality generality if techcl==1 & software_bh==0, clean noobs;
list period ct num_claims backward forward forwardext originality generality if techcl==0 & software_bh==0, clean noobs;

* Table S6a/b/c;
use patent-spatial-final1, clear;
collapse (mean) dENG dCHN dEUR dHIS dHIN dJAP dKOR dRUS dVNM, by(period) fast;
for var dENG-dVNM: replace X=100*X; format d* %3.1f; 
list period d*, clean noobs;
use patent-spatial-final1, clear;
collapse (mean) dENG dCHN dEUR dHIS dHIN dJAP dKOR dRUS dVNM, by(nber_cat1) fast;
for var dENG-dVNM: replace X=100*X; format d* %3.1f; 
list nber_cat1 d*, clean noobs;
use patent-spatial-final1, clear;
collapse (mean) dENG dCHN dEUR dHIS dHIN dJAP dKOR dRUS dVNM, by(software_bh period) fast;
for var dENG-dVNM: replace X=100*X; format d* %3.1f; 
list period d* if software_bh==1, clean noobs;
list period d* if software_bh==0, clean noobs;
use patent-spatial-final1, clear;
collapse (mean) dENG dCHN dEUR dHIS dHIN dJAP dKOR dRUS dVNM, by(software_bh nber_cat1) fast;
for var dENG-dVNM: replace X=100*X; format d* %3.1f; 
list nber_cat1 d* if software_bh==1, clean noobs;
list nber_cat1 d* if software_bh==0, clean noobs;

* Figure S4;;
use patent-spatial-final1, clear;
gen dOTH=dJAP+dKOR+dVNM;
collapse (mean) dCHN dHIN dHIS dRUS dOTH, by(ayear) fast;
format d* %4.3f;
list dCHN dHIN dHIS dRUS dOTH, clean noobs;

* Figure S5;
use patent-spatial-final1, clear;
replace dENG=dENG+dEUR; gen dETH=1-dENG if dENG!=.;
collapse (sum) dENG dETH, by(cluster period) fast;
for any dENG dETH: egen temp1=sum(X), by(period) \ replace X=X/temp1 \ drop temp;
table period cluster, c(mean dETH) f(%4.3f);
table period cluster, c(mean dENG) f(%4.3f);
use patent-spatial-final1, clear;
replace dENG=dENG+dEUR; gen dETH=1-dENG if dENG!=.;
collapse (sum) dENG dETH, by(cluster software_bh period) fast;
for any dENG dETH: egen temp1=sum(X), by(period software_bh) \ replace X=X/temp1 \ drop temp;
for num 1 0: table period cluster if software_bh==X, c(mean dETH) f(%4.3f);
for num 1 0: table period cluster if software_bh==X, c(mean dENG) f(%4.3f);

* Software penetration into USPC fields;
use patent-spatial-final1, clear;
keep if gyear<=2012 & uspc0!=""; drop uspc1;
collapse (mean) software_bh, by(uspc gyear) fast;
egen temp1=min(gyear), by(uspc); drop if temp1>=1980; keep if gyear==2012;
gen sh05=(software_bh>0.05);
gen sh25=(software_bh>0.25);
sum;

* Decomposition;
use patent-spatial-final1, clear;
keep if gyear<=2012 & uspc0!=""; drop uspc1;
gen pd=1 if (gyear>=1975 & gyear<=1977);
replace pd=2 if (gyear>=2010 & gyear<=2012);
drop if pd==.; gen ct=1;
collapse (sum) ct (mean) software_bh, by(uspc pd);
egen temp1=count(pd), by(uspc); keep if temp1==2; drop temp1;
reshape wide ct software_bh, i(uspc) j(pd);
for num 1/2:
\ egen totctX=sum(ctX) 
\ gen shX=ctX/totctX 
\ egen totswX=sum(ctX*software_bhX)
\ replace totswX=totswX/totctX;
egen Dwithin=sum(sh1*(software_bh2-software_bh1));
egen Dbtwn=sum((software_bh1-totsw1)*(sh2-sh1));
egen Dcross=sum((software_bh2-software_bh1)*(sh2-sh1));
gen Dsw=totsw2-totsw1; 
gen Pgr1=Dsw/totsw1; 
for any within btwn cross: gen PX=DX/Dsw;
gen n=0; collapse (mean) totsw1 totsw2 Pwithin Pbtwn Pcross Pgr1, by(n) fast;
gen Ptot=Pwithin+Pbtwn+Pcross;
order n totsw1 totsw2 Pwithin Pbtwn Pcross Pgr1;
sum;

* MSA persistence;
use patent-spatial-final1, clear;
gen Tctsw=software_bh; gen Tctnsw=1-software_bh;
drop if msa==9999;
collapse (sum) T*, by(msa period) fast;
for any sw nsw: egen temp=sum(TctX), by (period) \ gen SHX=TctX/temp \ drop temp TctX;
reshape wide SHsw SHnsw, i(msa) j(period);
for any sw nsw: pwcorr SHX*;

* Statistics re universities;
use patent-spatial-final1, clear;
gen ct=1; keep if univ==1 & (period==1975 | period==2015);
collapse (sum) ct, by(cluster period);
reshape wide ct, i(cluster) j(period);
gen gr=(ct2015/ct1975)^(1/(2015-1975))-1;
list, clean noobs;
use patent-spatial-final1, clear;
tab period, s(univ);
tab period if software_bh==1, s(univ);
tab period if software_bh!=1, s(univ);
gen ct=1; drop if (ass_id1==. | msa==.);
collapse (sum) ct (mean) univ, by(ass_id1 msa period) fast;
gsort msa period -ct ass_id; drop if msa==msa[_n-1] & period==period[_n-1]; 
tab period, s(univ);
use patent-spatial-final1, clear;
gen ct=software_bh; drop if (ass_id1==. | msa==.);
collapse (sum) ct (mean) univ, by(ass_id1 msa period) fast;
gsort msa period -ct ass_id; drop if msa==msa[_n-1] & period==period[_n-1]; 
tab period, s(univ);
use patent-spatial-final1, clear;
gen ct=1-software_bh; drop if (ass_id1==. | msa==.);
collapse (sum) ct (mean) univ, by(ass_id1 msa period) fast;
gsort msa period -ct ass_id; drop if msa==msa[_n-1] & period==period[_n-1]; 
tab period, s(univ);

*** Comparison of reallocation to bottom of distribution;
use if period==2015 using patent-spatial-final1, clear;
gen Tcttot=1; collapse (sum) T*, by(msa) fast;
egen temp1=sum(Tcttot); gen sh=Tcttot/temp1; drop temp1;
gsort sh Tcttot; gen csh=sh if _n==1; replace csh=csh[_n-1]+sh if _n>1;  list;

********************************************;
*** Extension: Top 4 vs Next 3 Patent Gr ***;
********************************************;

use patent-spatial-final1, clear;
keep if period==1975 | period==2010;
gen Tct=1; collapse (sum) T* (mean) cluster, by(msa period) fast;
reshape wide Tct, i(msa) j(period);
gen grrate1=Tct2010/Tct1975;
gsort -grrate1; list in 1/20, clean noobs;
replace grrate1=. if Tct1975<50;
gsort -grrate1; list in 1/20, clean noobs;
gen chpat=Tct2010-Tct1975;
gsort -chpat; list in 1/20, clean noobs;

use patent-spatial-final1, clear;
drop gyear ayear num_claims backward forward fw_selfcite originality generality dENG-dVNM uspc*;
for any 2082 640 1602 6162: replace cluster=3 if msa==X;

* Organize patents and institutions;
gen Tcttot=1; gen Tctsw=software_bh; gen Tctnsw=1-software_bh;
for any cttot ctsw ctnsw: gen IX=TX if ind==1 \ gen UX=TX if univ==1 \ gen GX=TX if gov==1 \ gen NX=TX if (ind==0 & gov==0 & univ==0);
for any cttot ctsw ctnsw: gen C1X=TX if (nber_cat==2 | nber_cat==4) \ gen C2X=TX if (nber_cat==1 | nber_cat==3) \ gen C3X=TX if (nber_cat==5 | nber_cat==6);
for num 1/6: gen ZXctsw=Tctsw if nber_cat==X \ gen ZXctnsw=Tctnsw if nber_cat==X;

* Collapse on MSA-period cells and merge population data;
* Prepare a zero-valued observation for 7000 in 1980;
* Fix pop min for early years of 380 (Anchorage AK) and 3320 (Honolulu HI);
collapse (sum) T* I* U* G* N* C* Z* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T* I* U* G* N* C* Z*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* I* U* G* N* C* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
collapse (sum) T* I* U* G* N* C* Z* tab*, by(period) fast;
for var tabTcttot*: replace X=X/Tcttot;
for var tabTctsw*: replace X=X/Tctsw;
for var tabTctnsw*: replace X=X/Tctnsw;
for any T I U G N C1 C2 C3: gen shswX=100*Xctsw/Xcttot;
format tab* %4.3f; format sh* %3.2f; 

* Table S7 - Equivalent to Figure 2 & Table S2;
for any tot sw nsw:
list period tabTctX*, clean noobs;

*********************************;
*** Super-Linear Calculation  ***;
*********************************;

use patent-spatial-final1, clear;
gen Tcttot=1; gen Tctsw=software_bh; gen Tctnsw=1-software_bh;
collapse (sum) T* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T*: replace X=0 if _m==2;
for var T*: replace X=1 if X<1;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;
drop if msa==9999;

* Table S8: True linear analysis;
for var T* pop: gen lX=ln(X);
bysort period: regress lTcttot lpop, r;
bysort period: regress lTctsw lpop, r;
bysort period: regress lTctnsw lpop, r;

* Show 1975-1979 allocations;
gen temp1=Tcttot if (cluster==1 | cluster==2) & period==1975;
egen temp2=sum(temp1), by(period);
gen temp3=Tcttot/temp2 if (cluster==1 | cluster==2) & period==1975;
tab msa if (cluster==1 | cluster==2) & period==1975, s(temp3);
egen msash=min(temp3), by(msa);
drop temp*;

* Table S8: If no reallocation;
gen temp1=Tcttot if (cluster==1 | cluster==2);
egen temp2=sum(temp1), by(period);
gen Tctrev=Tcttot;
replace Tctrev=temp2*msash if (cluster==1 | cluster==2);
drop temp*;
for var Tctrev: gen lX=ln(X);
bysort period: regress lTctrev lpop, r;

********************************************;
*** Core Graham-Vishnubhakat Results     ***;
********************************************;

use if period!=2015 & software_gv!=. using patent-spatial-final1, clear;
drop gyear ayear num_claims backward forward fw_selfcite originality generality dENG-dVNM uspc*;

* Organize patents and institutions;
gen Tcttot=1; gen Tctsw=software_gv; gen Tctnsw=1-software_gv;
for any cttot ctsw ctnsw: gen IX=TX if ind==1 \ gen UX=TX if univ==1 \ gen GX=TX if gov==1 \ gen NX=TX if (ind==0 & gov==0 & univ==0);
for any cttot ctsw ctnsw: gen C1X=TX if (nber_cat==2 | nber_cat==4) \ gen C2X=TX if (nber_cat==1 | nber_cat==3) \ gen C3X=TX if (nber_cat==5 | nber_cat==6);
for num 1/6: gen ZXctsw=Tctsw if nber_cat==X \ gen ZXctnsw=Tctnsw if nber_cat==X;

* Collapse on MSA-period cells and merge population data;
* Prepare a zero-valued observation for 7000 in 1980;
* Fix pop min for early years of 380 (Anchorage AK) and 3320 (Honolulu HI);
collapse (sum) T* I* U* G* N* C* Z* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
drop if period==2015;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T* I* U* G* N* C* Z*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate HHI and EG components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* I* U* G* N* C* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
for var T* I* U* G* N* C* pop: gen hhiX=shX^2 if msa!=9999;
for var T* I* U* G* N* C*: gen egX=(shX-shpop)^2 if msa!=9999;
for var hhi* eg*: replace X=. if msa==9999;
collapse (sum) T* I* U* G* N* C* Z* tab* hhi* eg*, by(period) fast;
for var tabTcttot*: gen rawX=X \ replace X=X/Tcttot;
for var tabTctsw*: gen rawX=X \ replace X=X/Tctsw;
for var tabTctnsw*: gen rawX=X \ replace X=X/Tctnsw;
for var eg*: replace X=X/(1-hhipop);
for any T I U G N C1 C2 C3: gen shswX=100*Xctsw/Xcttot;
format tab* eg* hhi* %4.3f; format sh* %3.2f; format raw* %8.0f; 

* Table S10a - Equivalent to Figure 2 & Table S2;
for any tot sw nsw:
list period tabTctX*, clean noobs;

* Table S11a - Equivalent to Table S3;
for any T:
list period Xcttot Xctsw shswX hhiXcttot hhiXctsw hhiXctnsw egXcttot egXctsw egXctnsw, clean noobs;

*********************************;
*** Core ML Analysis         ***;
*********************************;

use if software_ml!=. using patent-spatial-final1, clear;
drop gyear ayear num_claims backward forward fw_selfcite originality generality dENG-dVNM uspc*;

* Organize patents and institutions;
gen Tcttot=1; gen Tctsw=software_ml; gen Tctnsw=1-software_ml;
for any cttot ctsw ctnsw: gen IX=TX if ind==1 \ gen UX=TX if univ==1 \ gen GX=TX if gov==1 \ gen NX=TX if (ind==0 & gov==0 & univ==0);
for any cttot ctsw ctnsw: gen C1X=TX if (nber_cat==2 | nber_cat==4) \ gen C2X=TX if (nber_cat==1 | nber_cat==3) \ gen C3X=TX if (nber_cat==5 | nber_cat==6);
for num 1/6: gen ZXctsw=Tctsw if nber_cat==X \ gen ZXctnsw=Tctnsw if nber_cat==X;

* Collapse on MSA-period cells and merge population data;
* Prepare a zero-valued observation for 7000 in 1980;
* Fix pop min for early years of 380 (Anchorage AK) and 3320 (Honolulu HI);
collapse (sum) T* I* U* G* N* C* Z* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T* I* U* G* N* C* Z*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate HHI and EG components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* I* U* G* N* C* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
for var T* I* U* G* N* C* pop: gen hhiX=shX^2 if msa!=9999;
for var T* I* U* G* N* C*: gen egX=(shX-shpop)^2 if msa!=9999;
for var hhi* eg*: replace X=. if msa==9999;
collapse (sum) T* I* U* G* N* C* Z* tab* hhi* eg*, by(period) fast;
for var tabTcttot*: gen rawX=X \ replace X=X/Tcttot;
for var tabTctsw*: gen rawX=X \ replace X=X/Tctsw;
for var tabTctnsw*: gen rawX=X \ replace X=X/Tctnsw;
for var eg*: replace X=X/(1-hhipop);
for any T I U G N C1 C2 C3: gen shswX=100*Xctsw/Xcttot;
format tab* eg* hhi* %4.3f; format sh* %3.2f; format raw* %8.0f; 

* Table S10b - Equivalent to Figure 2 & Table S2;
for any tot sw nsw:
list period tabTctX*, clean noobs;

* Table S11b - Equivalent to Table S3;
for any T:
list period Xcttot Xctsw shswX hhiXcttot hhiXctsw hhiXctnsw egXcttot egXctsw egXctnsw, clean noobs;

*********************************;
*** ML Extra Analysis Text    ***;
*********************************;

use if software_ml!=. using patent-spatial-final1, clear;
gen Tct00=(probability>0 & probability<=.1);
gen Tct10=(probability>0.1 & probability<=.8);
gen Tct80=(probability>0.8 & probability<=1);
gen Tcttot=1; sum Tct*;

* Collapse on MSA-period cells and merge population data;
collapse (sum) T* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate HHI and EG components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
for var T* pop: gen hhiX=shX^2 if msa!=9999;
for var T*: gen egX=(shX-shpop)^2 if msa!=9999;
for var hhi* eg*: replace X=. if msa==9999;
collapse (sum) T* tab* hhi* eg*, by(period) fast;
for var eg*: replace X=X/(1-hhipop);
for any 00 10 80: 
\ egen temp1=rsum(tabTctX*) 
\ replace tabTctX1=tabTctX1/temp1 
\ replace tabTctX2=tabTctX2/temp1 
\ replace tabTctX3=tabTctX3/temp1 
\ replace tabTctX4=tabTctX4/temp1 
\ drop temp1;
for any 00 10 80: gen shX=100*TctX/Tcttot;
format tab* eg* hhi* %4.3f; format sh* %3.2f; 

list period tabTct00*, clean noobs;
list period tabTct10*, clean noobs;
list period tabTct80*, clean noobs;
list period sh*, clean noobs;
list period eg*, clean noobs;

*********************************;
*** Core AI Analysis          ***;
*********************************;

use if software_ai!=. using patent-spatial-final1, clear;
drop gyear ayear num_claims backward forward fw_selfcite originality generality dENG-dVNM uspc*;

* Organize patents and institutions;
gen Tcttot=1; gen Tctsw=software_ai; gen Tctnsw=1-software_ai;
for any cttot ctsw ctnsw: gen IX=TX if ind==1 \ gen UX=TX if univ==1 \ gen GX=TX if gov==1 \ gen NX=TX if (ind==0 & gov==0 & univ==0);
for any cttot ctsw ctnsw: gen C1X=TX if (nber_cat==2 | nber_cat==4) \ gen C2X=TX if (nber_cat==1 | nber_cat==3) \ gen C3X=TX if (nber_cat==5 | nber_cat==6);
for num 1/6: gen ZXctsw=Tctsw if nber_cat==X \ gen ZXctnsw=Tctnsw if nber_cat==X;

* Collapse on MSA-period cells and merge population data;
* Prepare a zero-valued observation for 7000 in 1980;
* Fix pop min for early years of 380 (Anchorage AK) and 3320 (Honolulu HI);
collapse (sum) T* I* U* G* N* C* Z* (mean) cluster, by(msa period) fast;
sort msa period; merge msa period using period_population_msa;
tab _m; table msa _m if _m!=3; ren pop pop; 
for var T* I* U* G* N* C* Z*: replace X=0 if _m==2;
egen temp1=min(pop), by(msa); replace pop=temp1 if _m==1; drop _m temp1;

* Generate HHI and EG components and collapse;
for var T*: gen tabX1=X if cluster==1 \ gen tabX2=X if cluster==2 \ gen tabX3=X if cluster==3 \ gen tabX4=X if cluster==4;
for var T* I* U* G* N* C* pop:
\ gen temp1=X if msa!=9999 \ egen sumX=sum(temp1), by(period) \ gen shX=X/sumX if msa!=9999  \ drop sumX temp1;
for var T* I* U* G* N* C* pop: gen hhiX=shX^2 if msa!=9999;
for var T* I* U* G* N* C*: gen egX=(shX-shpop)^2 if msa!=9999;
for var hhi* eg*: replace X=. if msa==9999;
collapse (sum) T* I* U* G* N* C* Z* tab* hhi* eg*, by(period) fast;
for var tabTcttot*: gen rawX=X \ replace X=X/Tcttot;
for var tabTctsw*: gen rawX=X \ replace X=X/Tctsw;
for var tabTctnsw*: gen rawX=X \ replace X=X/Tctnsw;
for var eg*: replace X=X/(1-hhipop);
for any T I U G N C1 C2 C3: gen shswX=100*Xctsw/Xcttot;
format tab* eg* hhi* %4.3f; format sh* %3.2f; format raw* %8.0f; 

* Table S10c - Equivalent to Figure 2 & Table S2;
for any tot sw nsw:
list period tabTctX*, clean noobs;

* Table S11b - Equivalent to Table S3;
for any T:
list period Xcttot Xctsw shswX hhiXcttot hhiXctsw hhiXctnsw egXcttot egXctsw egXctnsw, clean noobs;

*** End of program;
log close;
stop;
